library(devtools) # Reproducibility (see end of file)
library(phyloseq) # Easier data manipulation
library(tidyverse) # Pretty plotting and data manipulation
library(forcats) # Recoding factors
library(cowplot) # Multiple plotting
library(d3heatmap) # For nice heatmaps
library(phytools) # Working with phylogenetic trees
library(ggtree) # Pretty ggplot-esque phylogenetic trees
library(ape) # Working with phylogenetic trees
library(DT) # Pretty Tables
source("code/set_colors.R") # Set Colors for plotting
# Set some global ggplot parameters
mytheme <- theme(legend.text = element_text(size = 8), legend.title = element_text(size = 8, face = "bold"),
plot.title = element_text(size = 10), legend.position = c(0.01, 0.9),
axis.title = element_text(size = 10, face = "bold"), axis.text = element_text(size = 8),
legend.key.width=unit(0.1,"line"), legend.key.height=unit(0.1,"line"))
# Read in the data
raw_data <- read.table(file="data/Chloroplasts_removed/old/nochloro_HNA_LNA.tsv", header = TRUE) %>%
mutate(norep_filter_name = paste(substr(Sample_16S,1,4), substr(Sample_16S,6,9), sep = "")) %>%
arrange(norep_filter_name)
# Subset out only the Muskegon and Surface samples
muskegon_data <- raw_data %>%
dplyr::filter(Lake == "Muskegon" & Depth == "Surface") %>%
dplyr::select(norep_filter_name)
# Load in the productivity data
production <- read.csv(file="data/production_data.csv", header = TRUE) %>%
dplyr::filter(fraction == "Free" & limnion == "Surface") %>% # Select only rows that are free-living
dplyr::select(names, tot_bacprod, SD_tot_bacprod) %>% # Select relevant columns for total productivity
mutate(tot_bacprod = round(tot_bacprod, digits = 2), # Round to 2 decimals
SD_tot_bacprod = round(SD_tot_bacprod, digits = 2) ) %>%
dplyr::rename(norep_filter_name = names) %>% # Rename to match other data frame
arrange(norep_filter_name)
# Stop if the names do not match
stopifnot(muskegon_data$norep_filter_name == production$norep_filter_name)
# Combine the two muskegon data frames into one
combined_data <- left_join(muskegon_data, production, by = "norep_filter_name")
# Merge the combined data back into the original data frame (data)
data <- left_join(raw_data, combined_data, by = "norep_filter_name") %>%
dplyr::select(-c(Platform, Fraction)) %>%
mutate(Lake = factor(Lake, levels = c("Michigan","Inland", "Muskegon")))
head(data)
## samples Total.cells HNA.cells LNA.cells Total.count.sd HNA.sd LNA.sd Lake Sample_16S Season Month Year Site Depth Total_Sequences norep_filter_name tot_bacprod SD_tot_bacprod
## 1 110D2-115-2 620420.9 139880.7 480540.2 1640.275 4639.952 5795.307 Michigan 110D2F115 Winter Januari 2015 MM110 Deep 19654 110DF115 NA NA
## 2 110D2-415-2 799314.0 155198.1 644115.9 9388.845 4198.741 5493.442 Michigan 110D2F415 Spring April 2015 MM110 Deep 7951 110DF415 NA NA
## 3 110D2-515 1261369.4 362443.6 898925.9 15341.779 10504.696 6299.495 Michigan 110D2F515 Spring May 2015 MM110 Deep 16293 110DF515 NA NA
## 4 110D2-715-2 542723.6 131132.1 411591.5 6139.696 2196.008 3967.958 Michigan 110D2F715 Summer July 2015 MM110 Deep 16882 110DF715 NA NA
## 5 110D2-815 548027.9 131820.5 416207.4 3552.010 3805.294 2441.935 Michigan 110D2F815 Summer August 2015 MM110 Deep 22157 110DF815 NA NA
## 6 110D2-915-2 731931.0 189746.2 542184.8 36726.396 6634.230 30473.925 Michigan 110D2F915 Fall September 2015 MM110 Deep 16500 110DF915 NA NA
# Fix the metadata
data$Month[data$Month == "Januari"] <- "June"
data$Season[data$Season == "Winter"] <- "Summer"
# Write out the file
#write.table(data, file="data/Chloroplasts_removed/productivity_data.tsv", row.names=TRUE)
####### HELPFUL TRANSFORMED AND SUBSETTED DATA
##### Subset Muskegon Lake Data Only
muskegon <- dplyr::filter(data, Lake == "Muskegon" & Depth == "Surface") %>%
mutate(Site = factor(Site, levels = c("MOT", "MDP", "MBR", "MIN"))) %>%
filter(tot_bacprod < 90)
# Gather the data for summary statistics
df_cells <- data %>%
dplyr::select(1:4, Lake, Season:Depth) %>%
rename(Total = Total.cells, HNA = HNA.cells, LNA = LNA.cells) %>%
gather(key = FCM_type, value = num_cells, Total:LNA)
# Number of cells in HNA and LNA across all samples
# For a plot of these values, see figure 1 below
df_cells %>%
group_by(FCM_type) %>%
summarise(num_samples = n(),
mean_cells = round(mean(num_cells), digits = 2),
sd_mean_cells = round(sd(num_cells), digits = 2),
median_cells = round(median(num_cells), digits = 2)) %>%
datatable(caption = "Mean and median number of cells per ecosystem", rownames = FALSE)
# Are there more total cells in one lake over the other?
totcells_df <- df_cells %>%
filter(FCM_type == "Total")
# Compute the analysis of variance
totcells_aov <- aov(num_cells ~ Lake, data = totcells_df)
summary(totcells_aov) # there is a difference, but which lake?
## Df Sum Sq Mean Sq F value Pr(>F)
## Lake 2 4.332e+14 2.166e+14 37.75 2.72e-14 ***
## Residuals 170 9.755e+14 5.738e+12
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Which samples are significant from each other?
TukeyHSD(totcells_aov) # Michigan is significantly different form Muskegon and Inland
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = num_cells ~ Lake, data = totcells_df)
##
## $Lake
## diff lwr upr p adj
## Inland-Michigan 3417194.2 2411737 4422651.4 0.0000000
## Muskegon-Michigan 3030294.1 1939004 4121584.0 0.0000000
## Muskegon-Inland -386900.1 -1489077 715276.9 0.6849527
# Number of cells in HNA and LNA per ecosystem
df_cells %>%
group_by(Lake, FCM_type) %>%
summarise(num_samples = n(),
mean_cells = round(mean(num_cells), digits = 2),
median_cells = round(median(num_cells), digits = 2)) %>%
datatable(caption = "Mean and median number of cells per ecosystem", rownames = FALSE)
# Proportion of HNA and LNA across all samples
df_cells %>%
dplyr::select(samples, Lake, FCM_type, num_cells) %>%
spread(FCM_type, num_cells) %>%
mutate(prop_HNA = HNA/Total * 100,
prop_LNA = LNA/Total * 100) %>%
dplyr::select(Lake, prop_HNA, prop_LNA) %>%
summarize(mean_HNA = round(mean(prop_HNA), digits = 2),
sd_HNA = round(sd(prop_HNA), digits = 2),
mean_LNA = round(mean(prop_LNA), digits = 2),
sd_LNA = round(sd(prop_LNA), digits = 2))
## mean_HNA sd_HNA mean_LNA sd_LNA
## 1 30.41 9.06 69.59 9.06
# Proportion of HNA and LNAper ecosystem
prop_stats <- df_cells %>%
dplyr::select(samples, Lake, FCM_type, num_cells) %>%
spread(FCM_type, num_cells) %>%
mutate(prop_HNA = HNA/Total * 100,
prop_LNA = LNA/Total * 100) %>%
dplyr::select(Lake, prop_HNA, prop_LNA) %>%
rename(HNA = prop_HNA, LNA = prop_LNA) %>%
group_by(Lake) %>%
summarize(mean_HNA = round(mean(HNA), digits = 2),
mean_LNA = round(mean(LNA), digits = 2),
min_HNA = round(min(HNA), digits = 2),
max_HNA = round(max(HNA), digits = 2),
min_LNA = round(min(LNA), digits = 2),
max_LNA = round(max(LNA), digits = 2),
num_samples = n())
datatable(prop_stats, caption = "Statistics of the percentage of each flow cytometry group across the systems", rownames = FALSE)
# Load in the data from each lake
musk_all_df <- read.table(file="data/Chloroplasts_removed/ByLake_Filtering/5in10/muskegon/muskegon_sampledata_5in10.tsv", header = TRUE) %>%
mutate(norep_filter_name = paste(substr(Sample_16S,1,4), substr(Sample_16S,6,9), sep = "")) %>%
arrange(norep_filter_name)
mich_all_df <- read.table(file="data/Chloroplasts_removed/ByLake_Filtering/5in10/michigan/michigan_sampledata_5in10.tsv", header = TRUE) %>%
mutate(norep_filter_name = paste(substr(Sample_16S,1,4), substr(Sample_16S,6,9), sep = "")) %>%
arrange(norep_filter_name)
inla_all_df <- read.table(file="data/Chloroplasts_removed/ByLake_Filtering/5in10/inland/inland_sampledata_5in10.tsv", header = TRUE) %>%
mutate(norep_filter_name = paste(substr(Sample_16S,1,4), substr(Sample_16S,6,9), sep = "")) %>%
arrange(norep_filter_name)
stopifnot(colnames(musk_all_df) == colnames(mich_all_df))
stopifnot(colnames(musk_all_df) == colnames(inla_all_df))
lakes <- bind_rows(musk_all_df, mich_all_df, inla_all_df)
p1 <- ggplot(df_cells, aes(x = FCM_type, y = num_cells, fill = Lake, color = Lake, shape = Lake)) +
geom_point(size = 1.5, position = position_jitterdodge(), color = "black") +
geom_boxplot(alpha = 0.7, outlier.shape = NA, show.legend = FALSE, color = "black") +
scale_color_manual(values = lake_colors, guide = "none") +
scale_fill_manual(values = lake_colors) +
scale_shape_manual(values = lake_shapes) +
labs(y = "Number of Cells (cells/mL)", x = "FCM Type") +
mytheme + theme(legend.title = element_blank(), legend.position = c(0.01, 0.95)) +
guides(colour = guide_legend(override.aes = list(size=2.5)),
shape = guide_legend(override.aes = list(size=2.5)),
fill = guide_legend(override.aes = list(size=2.5)))
## Is there a corrlation between HNA and LNA across ecosystems?
# 1. Run the linear model
lm_allNA_corr <- lm(LNA.cells ~ HNA.cells, data = lakes)
summary(lm(LNA.cells ~ HNA.cells * Lake, data = lakes))
##
## Call:
## lm(formula = LNA.cells ~ HNA.cells * Lake, data = lakes)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3651006 -558525 -113725 511977 3997810
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.699e+06 3.590e+05 10.304 < 2e-16 ***
## HNA.cells 3.827e-01 1.704e-01 2.246 0.026 *
## LakeMichigan -2.995e+06 4.523e+05 -6.622 4.63e-10 ***
## LakeMuskegon -2.363e+06 5.411e+05 -4.367 2.21e-05 ***
## HNA.cells:LakeMichigan 5.404e-01 4.257e-01 1.269 0.206
## HNA.cells:LakeMuskegon 1.027e+00 2.549e-01 4.029 8.50e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1301000 on 167 degrees of freedom
## Multiple R-squared: 0.6116, Adjusted R-squared: 0.6
## F-statistic: 52.6 on 5 and 167 DF, p-value: < 2.2e-16
## 2. Extract the R2 and p-value from the linear model:
lm_allNA_corr_stats <- paste("atop(R^2 ==", round(summary(lm_allNA_corr)$adj.r.squared, digits = 2), ",",
"p ==", round(unname(summary(lm_allNA_corr)$coefficients[,4][2]), digits = 24), ")")
# 3. Plot it
p2 <- ggplot(lakes, aes(x = HNA.cells, y = LNA.cells)) +
geom_errorbarh(aes(xmin = HNA.cells - HNA.sd, xmax = HNA.cells + HNA.sd), color = "grey", alpha = 0.8) +
geom_errorbar(aes(ymin = LNA.cells - LNA.sd, max = LNA.cells + LNA.sd), color = "grey", alpha = 0.8) +
geom_point(size = 2.5, alpha = 0.9, aes(fill = Lake, shape = Lake)) +
scale_fill_manual(values = lake_colors) +
scale_shape_manual(values = lake_shapes) +
geom_smooth(method = "lm", color = "black") +
labs(y = "LNA Cell Density (cells/mL)", x = "HNA Cell Density (cells/mL)") +
scale_x_continuous(labels = function(x) format(x, scientific = TRUE), limits = c(0, 6.1e+06)) +
scale_y_continuous(labels = function(x) format(x, scientific = TRUE), limits = c(0, 1e+07)) +
annotate("text", x=5e+06, y=0.8e+06, label=lm_allNA_corr_stats, parse = TRUE, color = "black", size = 3) +
mytheme + theme(legend.position = "none") #theme(legend.title = element_blank(), legend.position = c(0.01, 0.95))
# Figure 1
plot_grid(p1, p2, ncol = 2, nrow =1, labels = c("A","B"), rel_widths = c(0.95, 1))
### MUSKEGON ONLY ANALYSIS
# 1. Run the linear model
lm_muskNA_corr <- lm(LNA.cells ~ HNA.cells, data = musk_all_df)
## 2. Extract the R2 and p-value from the linear model:
lm_muskNA_corr_stats <- paste("atop(R^2 ==", round(summary(lm_muskNA_corr)$adj.r.squared, digits = 2), ",",
"p ==", round(unname(summary(lm_muskNA_corr)$coefficients[,4][2]), digits = 9), ")")
musk_corr_plot <- ggplot(musk_all_df, aes(x = HNA.cells, y = LNA.cells)) +
geom_errorbarh(aes(xmin = HNA.cells - HNA.sd, xmax = HNA.cells + HNA.sd), color = "grey") +
geom_errorbar(aes(ymin = LNA.cells - LNA.sd, max = LNA.cells + LNA.sd), color = "grey") +
geom_point(size = 3, shape = 22, aes(fill = Lake)) +
geom_smooth(method = "lm", color = "black") +
ggtitle("Muskegon Lake") + scale_fill_manual(values = lake_colors) +
scale_x_continuous(labels = function(x) format(x, scientific = TRUE), limits = c(0, 6.1e+06)) +
scale_y_continuous(labels = function(x) format(x, scientific = TRUE), limits = c(0, 1e+07)) +
labs(y = "LNA Cell Density (cells/mL)", x = "HNA Cell Density (cells/mL)") +
annotate("text", x=5e+06, y=0.8e+06, label=lm_muskNA_corr_stats, parse = TRUE, color = "black", size = 3) +
mytheme
### INLAND ONLY ANALYSIS
# 1. Run the linear model
lm_inlaNA_corr <- lm(LNA.cells ~ HNA.cells, data = filter(inla_all_df, Sample_16S != "Z14003F"))
## 2. Extract the R2 and p-value from the linear model:
lm_inlaNA_corr_stats <- paste("atop(R^2 ==", round(summary(lm_inlaNA_corr)$adj.r.squared, digits = 2), ",",
"p ==", round(unname(summary(lm_inlaNA_corr)$coefficients[,4][2]), digits = 2), ")")
inla_corr_plot <- ggplot(filter(inla_all_df, Sample_16S != "Z14003F"), aes(x = HNA.cells, y = LNA.cells)) +
geom_errorbarh(aes(xmin = HNA.cells - HNA.sd, xmax = HNA.cells + HNA.sd), color = "grey") +
geom_errorbar(aes(ymin = LNA.cells - LNA.sd, max = LNA.cells + LNA.sd), color = "grey") +
geom_point(size = 3, shape = 22, aes(fill = Lake)) +
geom_smooth(method = "lm", color = "black") +
scale_x_continuous(labels = function(x) format(x, scientific = TRUE), limits = c(0, 6.1e+06)) +
scale_y_continuous(labels = function(x) format(x, scientific = TRUE), limits = c(0, 1e+07)) +
ggtitle("Inland Lakes") + scale_fill_manual(values = lake_colors) +
labs(y = "LNA Cell Density (cells/mL)", x = "HNA Cell Density (cells/mL)") +
annotate("text", x=5e+06, y=0.8e+06, label=lm_inlaNA_corr_stats, parse = TRUE, color = "black", size = 3) +
mytheme
### MICHIGAN ONLY ANALYSIS
# 1. Run the linear model
lm_michNA_corr <- lm(LNA.cells ~ HNA.cells, data = mich_all_df)
## 2. Extract the R2 and p-value from the linear model:
lm_michNA_corr_stats <- paste("atop(R^2 ==", round(summary(lm_michNA_corr)$adj.r.squared, digits = 2), ",",
"p ==", round(unname(summary(lm_michNA_corr)$coefficients[,4][2]), digits = 11), ")")
mich_corr_plot <- ggplot(mich_all_df, aes(x = HNA.cells, y = LNA.cells)) +
geom_errorbarh(aes(xmin = HNA.cells - HNA.sd, xmax = HNA.cells + HNA.sd), color = "grey") +
geom_errorbar(aes(ymin = LNA.cells - LNA.sd, max = LNA.cells + LNA.sd), color = "grey") +
geom_point(size = 3, shape = 22, aes(fill = Lake)) +
geom_smooth(method = "lm", color = "black") +
scale_x_continuous(labels = function(x) format(x, scientific = TRUE), limits = c(0, 6.1e+06)) +
scale_y_continuous(labels = function(x) format(x, scientific = TRUE), limits = c(0, 1e+07)) +
ggtitle("Lake Michigan") + scale_fill_manual(values = lake_colors) +
labs(y = "LNA Cell Density (cells/mL)", x = "HNA Cell Density (cells/mL)") +
annotate("text", x=5e+06, y=0.8e+06, label=lm_michNA_corr_stats, parse = TRUE, color = "black", size = 3) +
mytheme
plot_grid(mich_corr_plot, inla_corr_plot, musk_corr_plot,
labels = c("A", "B", "C"), nrow = 1, ncol = 3)
######################## Analysis of HNA/LNA/Total Cells vs Total Productivity
# 1. Run the linear model
lm_HNA <- lm(tot_bacprod ~ HNA.cells, data = muskegon)
## 2. Extract the R2 and p-value from the linear model:
lm_HNA_stats <- paste("atop(R^2 ==", round(summary(lm_HNA)$adj.r.squared, digits = 2), ",",
"p ==", round(unname(summary(lm_HNA)$coefficients[,4][2]), digits = 5), ")")
# 3. Plot it
HNA_vs_prod <- ggplot(muskegon, aes(x = HNA.cells, y = tot_bacprod)) +
geom_errorbarh(aes(xmin = HNA.cells - HNA.sd, xmax = HNA.cells + HNA.sd), color = "grey", alpha = 0.8) +
geom_errorbar(aes(ymin = tot_bacprod - SD_tot_bacprod, max = tot_bacprod + SD_tot_bacprod), color = "grey", alpha = 0.8) +
geom_point(size = 2, shape = 22, fill = "deepskyblue4") +
geom_smooth(method = "lm", color = "deepskyblue4") +
labs(y = "Total Bacterial Production \n (μg C/L/day)", x = "HNA Cell Density \n(cells/mL)") +
scale_x_continuous(labels = function(x) format(x, scientific = TRUE),
breaks = c(2e+06, 3e+06)) +
annotate("text", x=1.65e+06, y=60, label=lm_HNA_stats, parse = TRUE, color = "black", size = 3) +
mytheme
# 1. Run the linear model
lm_LNA <- lm(tot_bacprod ~ LNA.cells, data = muskegon)
## 2. Extract the R2 and p-value from the linear model:
lm_LNA_stats <- paste("atop(R^2 ==", round(summary(lm_LNA)$adj.r.squared, digits = 3), ",",
"p ==", round(unname(summary(lm_LNA)$coefficients[,4][2]), digits = 2), ")")
# 3. Plot it
LNA_vs_prod <- ggplot(muskegon, aes(x = LNA.cells, y = tot_bacprod)) +
geom_errorbarh(aes(xmin = LNA.cells - LNA.sd, xmax = LNA.cells + LNA.sd), color = "grey", alpha = 0.8) +
geom_errorbar(aes(ymin = tot_bacprod - SD_tot_bacprod, max = tot_bacprod + SD_tot_bacprod), color = "grey", alpha = 0.8) +
geom_point(size = 2.5, shape = 22, fill = "darkgoldenrod1") +
labs(y = "Total Bacterial Production \n (μg C/L/day)", x = "LNA Cell Density \n(cells/mL)") +
geom_smooth(method = "lm", se = FALSE, linetype = "longdash", color = "darkgoldenrod1") +
annotate("text", x=2.75e+06, y=60, label=lm_LNA_stats, parse = TRUE, color = "red", size = 3) +
mytheme
# 1. Run the linear model
lm_total <- lm(tot_bacprod ~ Total.cells, data = muskegon)
## 2. Extract the R2 and p-value from the linear model:
lm_total_stats <- paste("atop(R^2 ==", round(summary(lm_total)$adj.r.squared, digits = 2), ",",
"p ==", round(unname(summary(lm_total)$coefficients[,4][2]), digits = 2), ")")
# 3. Plot it
Total_vs_prod <- ggplot(muskegon, aes(x = Total.cells, y = tot_bacprod)) +
geom_errorbarh(aes(xmin = Total.cells - Total.count.sd, xmax = Total.cells + Total.count.sd), color = "grey", alpha = 0.8) +
geom_errorbar(aes(ymin = tot_bacprod - SD_tot_bacprod, max = tot_bacprod + SD_tot_bacprod), color = "grey", alpha = 0.8) +
scale_shape_manual(values = lake_shapes) +
geom_point(size = 2.5, shape = 22, fill = "black") +
labs(y = "Total Bacterial Production \n (μg C/L/day)", x = "Total Cell Density \n(cells/mL)") +
geom_smooth(method = "lm", color = "black") +
#geom_smooth(method = "lm", se = FALSE, linetype = "longdash", color = "red") +
annotate("text", x=5.25e+06, y=60, label=lm_total_stats, parse = TRUE, color = "black", size = 3) +
mytheme
#### ONLY THE THREE PLOTS
# Put all three plots together into one
plot_grid(HNA_vs_prod + theme(legend.position = "none"),
LNA_vs_prod + theme(axis.title.y = element_blank(), legend.position = "none"),
Total_vs_prod + theme(axis.title.y = element_blank(), legend.position = "none"),
labels = c("A", "B", "C"),
ncol = 3, rel_widths = c(0.95, 0.8, 0.8))
## Plot the fraction of HNA
fmusk <- muskegon %>% mutate(fHNA = HNA.cells/Total.cells)
lm_fHNA <- lm(tot_bacprod ~ fHNA, data = fmusk)
## 2. Extract the R2 and p-value from the linear model:
lm_fHNA_stats <- paste("atop(R^2 ==", round(summary(lm_fHNA)$adj.r.squared, digits = 2), ",",
"p ==", round(unname(summary(lm_fHNA)$coefficients[,4][2]), digits = 3), ")")
fHNA_vs_prod <- ggplot(fmusk, aes(x = fHNA, y = tot_bacprod)) +
geom_errorbar(aes(ymin = tot_bacprod - SD_tot_bacprod, max = tot_bacprod + SD_tot_bacprod), color = "grey") +
scale_shape_manual(values = c(21, 22, 23, 24)) +
geom_point(size = 3, aes(shape = Site), fill = "black") +
geom_smooth(method = "lm") +
ylab("Bacterial Production") + xlab("Fraction HNA") +
annotate("text", x= 0.22, y=60, label=lm_fHNA_stats, parse = TRUE, color = "black", size = 4) +
mytheme + theme(legend.position = c(0.85, 0.15))
plot_grid(HNA_vs_prod + theme(legend.position = "none"),
fHNA_vs_prod + theme(legend.position = c(0.85, 0.15)),
labels = c("A", "B"), ncol = 2,
align = "h")
TODO
proportionHNA_plot <- df_cells %>%
dplyr::select(samples, Lake, FCM_type, num_cells) %>%
spread(FCM_type, num_cells) %>%
mutate(prop_HNA = HNA/Total * 100,
prop_LNA = LNA/Total * 100) %>%
dplyr::select(Lake, prop_HNA, prop_LNA) %>%
rename(HNA = prop_HNA, LNA = prop_LNA) %>%
gather(key = fcm_type, value = Percentage, HNA:LNA) %>%
ggplot(aes(x = Lake, y = Percentage, color = fcm_type, fill = fcm_type)) +
facet_grid(~fcm_type) + ylab("Percentage of Total Cells") +
geom_boxplot(alpha = 0.5, outlier.shape = NA) +
geom_jitter(alpha = 0.9) +
scale_color_manual(values = fcm_colors) +
scale_fill_manual(values = fcm_colors) +
theme(legend.position = "none")
proportionHNA_plot
devtools::session_info() # This will include session info with all R package version information
## setting value
## version R version 3.5.0 (2018-04-23)
## system x86_64, darwin15.6.0
## ui X11
## language (EN)
## collate en_US.UTF-8
## tz America/Toronto
## date 2018-05-30
##
## package * version date source
## ade4 1.7-11 2018-04-05 CRAN (R 3.5.0)
## animation 2.5 2017-03-30 CRAN (R 3.5.0)
## ape * 5.1 2018-04-04 CRAN (R 3.5.0)
## assertthat 0.2.0 2017-04-11 CRAN (R 3.5.0)
## backports 1.1.2 2017-12-13 CRAN (R 3.5.0)
## base * 3.5.0 2018-04-24 local
## base64enc 0.1-3 2015-07-28 CRAN (R 3.5.0)
## bindr 0.1.1 2018-03-13 CRAN (R 3.5.0)
## bindrcpp * 0.2.2 2018-03-29 CRAN (R 3.5.0)
## Biobase 2.40.0 2018-05-01 Bioconductor
## BiocGenerics 0.26.0 2018-05-01 Bioconductor
## biomformat 1.8.0 2018-05-01 Bioconductor
## Biostrings 2.48.0 2018-05-01 Bioconductor
## broom 0.4.4 2018-03-29 CRAN (R 3.5.0)
## cellranger 1.1.0 2016-07-27 CRAN (R 3.5.0)
## cli 1.0.0 2017-11-05 CRAN (R 3.5.0)
## cluster 2.0.7-1 2018-04-13 CRAN (R 3.5.0)
## clusterGeneration 1.3.4 2015-02-18 CRAN (R 3.5.0)
## coda 0.19-1 2016-12-08 CRAN (R 3.5.0)
## codetools 0.2-15 2016-10-05 CRAN (R 3.5.0)
## colorspace 1.3-2 2016-12-14 CRAN (R 3.5.0)
## combinat 0.0-8 2012-10-29 CRAN (R 3.5.0)
## compiler 3.5.0 2018-04-24 local
## cowplot * 0.9.2 2017-12-17 CRAN (R 3.5.0)
## crayon 1.3.4 2017-09-16 CRAN (R 3.5.0)
## crosstalk 1.0.0 2016-12-21 CRAN (R 3.5.0)
## d3heatmap * 0.6.1.2 2018-02-01 CRAN (R 3.5.0)
## data.table 1.11.2 2018-05-08 CRAN (R 3.5.0)
## datasets * 3.5.0 2018-04-24 local
## devtools * 1.13.5 2018-02-18 CRAN (R 3.5.0)
## digest 0.6.15 2018-01-28 CRAN (R 3.5.0)
## dplyr * 0.7.5 2018-05-19 CRAN (R 3.5.0)
## DT * 0.4 2018-01-30 CRAN (R 3.5.0)
## evaluate 0.10.1 2017-06-24 CRAN (R 3.5.0)
## expm 0.999-2 2017-03-29 CRAN (R 3.5.0)
## fastmatch 1.1-0 2017-01-28 CRAN (R 3.5.0)
## forcats * 0.3.0 2018-02-19 CRAN (R 3.5.0)
## foreach 1.4.4 2017-12-12 CRAN (R 3.5.0)
## foreign 0.8-70 2017-11-28 CRAN (R 3.5.0)
## ggplot2 * 2.2.1 2016-12-30 CRAN (R 3.5.0)
## ggtree * 1.12.0 2018-05-01 Bioconductor
## glue 1.2.0 2017-10-29 CRAN (R 3.5.0)
## graphics * 3.5.0 2018-04-24 local
## grDevices * 3.5.0 2018-04-24 local
## grid 3.5.0 2018-04-24 local
## gtable 0.2.0 2016-02-26 CRAN (R 3.5.0)
## haven 1.1.1 2018-01-18 CRAN (R 3.5.0)
## hms 0.4.2 2018-03-10 CRAN (R 3.5.0)
## htmltools 0.3.6 2017-04-28 CRAN (R 3.5.0)
## htmlwidgets 1.2 2018-04-19 CRAN (R 3.5.0)
## httpuv 1.4.3 2018-05-10 CRAN (R 3.5.0)
## httr 1.3.1 2017-08-20 CRAN (R 3.5.0)
## igraph 1.2.1 2018-03-10 CRAN (R 3.5.0)
## IRanges 2.14.10 2018-05-16 Bioconductor
## iterators 1.0.9 2017-12-12 CRAN (R 3.5.0)
## jsonlite 1.5 2017-06-01 CRAN (R 3.5.0)
## knitr 1.20 2018-02-20 CRAN (R 3.5.0)
## labeling 0.3 2014-08-23 CRAN (R 3.5.0)
## later 0.7.2 2018-05-01 CRAN (R 3.5.0)
## lattice 0.20-35 2017-03-25 CRAN (R 3.5.0)
## lazyeval 0.2.1 2017-10-29 CRAN (R 3.5.0)
## lubridate 1.7.4 2018-04-11 CRAN (R 3.5.0)
## magrittr 1.5 2014-11-22 CRAN (R 3.5.0)
## maps * 3.3.0 2018-04-03 CRAN (R 3.5.0)
## MASS 7.3-50 2018-04-30 CRAN (R 3.5.0)
## Matrix 1.2-14 2018-04-13 CRAN (R 3.5.0)
## memoise 1.1.0 2017-04-21 CRAN (R 3.5.0)
## methods * 3.5.0 2018-04-24 local
## mgcv 1.8-23 2018-01-21 CRAN (R 3.5.0)
## mime 0.5 2016-07-07 CRAN (R 3.5.0)
## mnormt 1.5-5 2016-10-15 CRAN (R 3.5.0)
## modelr 0.1.2 2018-05-11 CRAN (R 3.5.0)
## msm 1.6.6 2018-02-02 CRAN (R 3.5.0)
## multtest 2.36.0 2018-05-01 Bioconductor
## munsell 0.4.3 2016-02-13 CRAN (R 3.5.0)
## mvtnorm 1.0-7 2018-01-26 CRAN (R 3.5.0)
## nlme 3.1-137 2018-04-07 CRAN (R 3.5.0)
## numDeriv 2016.8-1 2016-08-27 CRAN (R 3.5.0)
## parallel 3.5.0 2018-04-24 local
## permute 0.9-4 2016-09-09 CRAN (R 3.5.0)
## phangorn 2.4.0 2018-02-15 CRAN (R 3.5.0)
## phyloseq * 1.24.0 2018-05-01 Bioconductor
## phytools * 0.6-44 2017-11-12 CRAN (R 3.5.0)
## pillar 1.2.2 2018-04-26 CRAN (R 3.5.0)
## pkgconfig 2.0.1 2017-03-21 CRAN (R 3.5.0)
## plotrix 3.7-1 2018-05-14 CRAN (R 3.5.0)
## plyr 1.8.4 2016-06-08 CRAN (R 3.5.0)
## png 0.1-7 2013-12-03 CRAN (R 3.5.0)
## promises 1.0.1 2018-04-13 CRAN (R 3.5.0)
## psych 1.8.4 2018-05-06 CRAN (R 3.5.0)
## purrr * 0.2.4 2017-10-18 CRAN (R 3.5.0)
## quadprog 1.5-5 2013-04-17 CRAN (R 3.5.0)
## R6 2.2.2 2017-06-17 CRAN (R 3.5.0)
## Rcpp 0.12.17 2018-05-18 CRAN (R 3.5.0)
## readr * 1.1.1 2017-05-16 CRAN (R 3.5.0)
## readxl 1.1.0 2018-04-20 CRAN (R 3.5.0)
## reshape2 1.4.3 2017-12-11 CRAN (R 3.5.0)
## rhdf5 2.24.0 2018-05-01 Bioconductor
## Rhdf5lib 1.2.1 2018-05-17 Bioconductor
## rlang 0.2.0 2018-02-20 CRAN (R 3.5.0)
## rmarkdown 1.9 2018-03-01 CRAN (R 3.5.0)
## rprojroot 1.3-2 2018-01-03 CRAN (R 3.5.0)
## rstudioapi 0.7 2017-09-07 CRAN (R 3.5.0)
## rvcheck 0.1.0 2018-05-23 CRAN (R 3.5.0)
## rvest 0.3.2 2016-06-17 CRAN (R 3.5.0)
## S4Vectors 0.18.2 2018-05-16 Bioconductor
## scales 0.5.0 2017-08-24 CRAN (R 3.5.0)
## scatterplot3d 0.3-41 2018-03-14 CRAN (R 3.5.0)
## shiny 1.1.0 2018-05-17 CRAN (R 3.5.0)
## splines 3.5.0 2018-04-24 local
## stats * 3.5.0 2018-04-24 local
## stats4 3.5.0 2018-04-24 local
## stringi 1.2.2 2018-05-02 CRAN (R 3.5.0)
## stringr * 1.3.1 2018-05-10 CRAN (R 3.5.0)
## survival 2.42-3 2018-04-16 CRAN (R 3.5.0)
## tibble * 1.4.2 2018-01-22 CRAN (R 3.5.0)
## tidyr * 0.8.1 2018-05-18 CRAN (R 3.5.0)
## tidyselect 0.2.4 2018-02-26 CRAN (R 3.5.0)
## tidytree 0.1.8 2018-04-19 CRAN (R 3.5.0)
## tidyverse * 1.2.1 2017-11-14 CRAN (R 3.5.0)
## tools 3.5.0 2018-04-24 local
## treeio 1.4.0 2018-05-01 Bioconductor
## utils * 3.5.0 2018-04-24 local
## vegan 2.5-2 2018-05-17 CRAN (R 3.5.0)
## withr 2.1.2 2018-03-15 CRAN (R 3.5.0)
## xml2 1.2.0 2018-01-24 CRAN (R 3.5.0)
## xtable 1.8-2 2016-02-05 CRAN (R 3.5.0)
## XVector 0.20.0 2018-05-01 Bioconductor
## yaml 2.1.19 2018-05-01 CRAN (R 3.5.0)
## zlibbioc 1.26.0 2018-05-01 Bioconductor